####################################################################################

#What You See and What You Get
#This file conducts the analysis for W1 and W2 survey
#Code to create Tables, Figures, and data cited in the main text and appendix 
#using Routputs created by running the previous three files (see documentation)


####################################################################################

rm(list=ls())
library(AER)
library(tidyverse)
library(readxl)
library(lubridate)
library(sandwich)
library(xtable)
library(corrgram)
library(forcats)
library(estimatr)
library(fastDummies)
library(randomizr)
library(ANOVAreplication)
library(miceadds)
library(clubSandwich)
library(texreg)
options(scipen=6)

source("Code/functions.R")

#Data for Table A.1 data 
#The table itself is typeset in 3_analysis_output_WW.R
#This part of the code cannot be replicated because it contains identifying info
#We provide the out.a1.RData instead
if(1==2){#this table uses identified datasets
  
  load("Data/sorteados_october2018.Rda")
  load("Data/inscritos_october2018.Rda")
  winners <- c(nrow(sorteadosf %>% filter(id_edital == "edital03.2011")),
               nrow(sorteadosf %>% filter(id_edital == "edital06.2011")),
               nrow(sorteadosf %>% filter(id_edital == "edital17.2016")),
               nrow(sorteadosf %>% filter(id_edital == "edital20.2016")))
  nonwinners <- c(nrow(inscritosf %>% filter(id_edital == "edital03.2011")) - nrow(sorteadosf %>% filter(id_edital == "edital03.2011")),
                  nrow(inscritosf %>% filter(id_edital == "edital06.2011")) - nrow(sorteadosf %>% filter(id_edital == "edital06.2011")),
                  nrow(inscritosf %>% filter(id_edital == "edital17.2016")) - nrow(sorteadosf %>% filter(id_edital == "edital17.2016")),
                  nrow(inscritosf %>% filter(id_edital == "edital20.2016")) - nrow(sorteadosf %>% filter(id_edital == "edital20.2016")))
  
  rm(sorteadosf, inscritosf)
  
  tb.a1a <- matrix(NA, 2, 4)
  tb.a1a[1,] <- nonwinners
  tb.a1a[2,] <- winners
  
  #Pre-sample early lotteries
  load("Data/data_sample_W2.1.RData")
  load("Data/data_sample_W2.2.RData")
  pre_e <- length(unique(W2.1$cpfstd)) + length(unique(W2.2$cpfstd))
  
  #Pre-sample recent lotteries
  load("Data/all_samples1_2.Rda")
  pre_r <- length(unique(all_samples$cpfstd))
  
  #Contacted early lotteries
  bd <- read_csv("Data/fgv_mcmv_campo.csv")
  int_he <- interval(ymd("2020-05-24"), ymd("2020-06-01")) #high-effort interviews
  bd <- bd %>% mutate(dates = ymd(data_de_campo),
                      he = ifelse(dates %within%  int_he, 1, 0))
  bd <- bd %>% filter(he != 1) #exclude interviews conducted in high-effort period.
  
  contactede <- bd %>% filter(b1 == 1)
  contactede <- length(unique(contactede$fieldid))
  
  #Contacted by Recent Lotteries
  load("Data/survey_late.Rda")
  load("Data/survey_late2.Rda")
  
  all <- bind_rows(survey_late, survey_late2)
  contactedr <- all %>% filter(b1 == 1)
  contactedr <- length(unique(contactedr$cpfstd))
  
  #Interviewed Early Lotteries
  load("Data/surveyW2.Rda")
  inte <- length(unique(survey$fieldID))
  rm(survey)
  
  #Interviewed Recent Lotteries
  load("Data/surveyW1.RData")
  intr <- length(unique(survey$fieldID))
  rm(survey)
  
  tb.a1b <- matrix(NA, 3, 2)
  tb.a1b[,1] <- c(pre_e, contactede, inte)
  tb.a1b[,2] <- c(pre_r, contactedr, intr)
  
  out.a1 <- list(outa1a=tb.a1a,
                 outa1b=tb.a1b)
  comment(out.a1) <- paste("Created in ",Sys.Date(),". Anonymized data for table A.1.",sep="")
  save(out.a1,file="Routputs/out-a1.RData")
}#end table A.1

# Generate an object with those enrolled in both lotteries in each wave
# This is mentioned in the paper. 
# The text that refers to these data is typeset in 3_analysis_output_WW.R
# This part of the code cannot be ran, we provide tmp_inscritosearlylate.RData for replicaiton
if(1==2){# requires identified data
  # Read the "inscritos" file ########
  load("Data/inscritos_october2018.Rda")	
  
  i03.2011 <- inscritosf %>%  filter(id_edital == "edital03.2011") %>% select(names.clean, cpfstd)   
  i03.2011$edital.03.2011 <- T
  
  i06.2011 <- inscritosf  %>% filter(id_edital == "edital06.2011") %>% select(names.clean, cpfstd)
  i06.2011$edital.06.2011 <- T
  
  i17.2016 <- inscritosf  %>% filter(id_edital == "edital17.2016") %>% select(names.clean, cpfstd)
  i17.2016$edital.17.2016 <- T
  
  i20.2016 <- inscritosf %>% filter(id_edital == "edital20.2016")  %>% select(names.clean,  cpfstd) 
  i20.2016$edital.20.2016 <- T
  
  insc1 <- full_join(i03.2011,i06.2011, by=c("cpfstd","names.clean"))                  
  insc1$w2 <- T
  insc2 <- full_join(i17.2016,i20.2016, by=c("cpfstd","names.clean"))                    
  insc2$w1 <- T
  
  insc <- full_join(insc1,insc2, by=c("cpfstd","names.clean"))
  rm(insc1,insc2)
  #anonymize
  insc$names.clean <- insc$cpfstd <- NULL
  comment(insc) <- paste("Created in ",Sys.Date(),". Anonymized participation data.",sep="")
  save(insc, file="Routputs/out-inscritosearlylate.RData")
}#end inscritosearlylate


